VINTRA
Turinys
VINTRA¶
import numpy as np
import pandas
import pandas as pd
from itables import init_notebook_mode
import os
import subprocess
init_notebook_mode(all_interactive=True)
working_directory = f'{os.getcwd()}/../data/gtfs/vintra/'
gtfs_files_directory = f'{working_directory}/gtfs-files/'
gtfs_file_stats_df = pd.DataFrame()
for file in sorted(os.listdir(gtfs_files_directory)):
if file.endswith('.zip'):
filename, _, _ = file.partition('.zip')
p = subprocess.Popen([
f'java -jar gtfs-validator-301.jar -i gtfs-files/{file} -o reports -v {filename}_report.json -e {filename}_system_errors.json -n -c lt'],
cwd=working_directory, shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = p.communicate(timeout=60)
errcode = p.returncode
_, _, gtfs_files_txt = out.decode("utf-8").partition('seconds\n')
gtfs_files = gtfs_files_txt.splitlines()
gtfs_files_dict = {'failas': filename}
for gtfs_file_rep in gtfs_files:
gtfs_file, c = gtfs_file_rep.split('\t')
gtfs_files_dict[gtfs_file] = c if c != 'MISSING_FILE' else None
gtfs_file_stats_df = gtfs_file_stats_df.append(gtfs_files_dict, ignore_index=True, )
gtfs_file_stats_df = gtfs_file_stats_df.reindex(
columns=[
'failas',
'agency.txt',
'calendar.txt',
'calendar_dates.txt',
'routes.txt',
'shapes.txt',
'stop_times.txt',
'stops.txt',
'trips.txt',
'fare_attributes.txt',
'fare_rules.txt',
'attributions.txt',
'feed_info.txt',
'frequencies.txt',
'levels.txt',
'pathways.txt',
'transfers.txt',
'translations.txt'
]
).set_index('failas')
gtfs_file_stats_df[
[
'agency.txt',
'calendar.txt',
'calendar_dates.txt',
'routes.txt',
'shapes.txt',
'stop_times.txt',
'stops.txt',
'trips.txt',
'fare_attributes.txt',
'fare_rules.txt',
]
] = gtfs_file_stats_df[
[
'agency.txt',
'calendar.txt',
'calendar_dates.txt',
'routes.txt',
'shapes.txt',
'stop_times.txt',
'stops.txt',
'trips.txt',
'fare_attributes.txt',
'fare_rules.txt',
]
].fillna('❌')
gtfs_file_stats_df.fillna('⚠️', inplace=True)
gtfs_file_stats_df.style.set_sticky(axis="index")
gtfs_file_stats_df
| agency.txt | calendar.txt | calendar_dates.txt | routes.txt | shapes.txt | stop_times.txt | stops.txt | trips.txt | fare_attributes.txt | fare_rules.txt | attributions.txt | feed_info.txt | frequencies.txt | levels.txt | pathways.txt | transfers.txt | translations.txt |
|---|
| Loading... (need help?) |
import json
reports_dir = f'{working_directory}/reports/'
gtfs_notices_df = pd.DataFrame()
for file in sorted(os.listdir(reports_dir)):
if file.endswith('report.json'):
gtfs_filename, _, _ = file.partition('_report.json')
with open(os.path.join(reports_dir, file)) as fp:
data = json.load(fp)
for notice in data['notices']:
gtfs_notices_df = gtfs_notices_df.append({
'failas': gtfs_filename,
'klaida': notice['code'],
'sunkumas': notice['severity'],
'viso': notice['totalNotices'],
}, ignore_index=True, )
gtfs_notices_df['viso'] = pd.to_numeric(gtfs_notices_df['viso'], downcast='integer')
GTFS patikrinimas¶
GTFS patikrinimo klaidos¶
def show_notices_table_by_severity(severity: str) -> pd.DataFrame:
gtfs_errors_df = gtfs_notices_df[gtfs_notices_df['sunkumas'] == severity].drop(columns=['sunkumas'])
gtfs_errors_df = gtfs_errors_df.pivot_table(index='failas', columns='klaida', values='viso', aggfunc='sum', margins=True, fill_value=0)
gtfs_errors_df.style.set_sticky(axis="index")
gtfs_errors_df = gtfs_errors_df.style.apply(lambda x: ["background: orange" if v >0 else '' for v in x], axis = 1)
return gtfs_errors_df
show_notices_table_by_severity('ERROR')
| klaida | decreasing_or_equal_stop_time_distance | duplicate_fare_rule_zone_id_fields | equal_shape_distance_diff_coordinates | missing_required_file | All |
|---|---|---|---|---|---|
| failas | |||||
| AnyksciuR | 0 | 0 | 1 | 0 | 1 |
| Birstono | 0 | 0 | 0 | 5 | 5 |
| BirzuR | 0 | 0 | 0 | 4 | 4 |
| Druskininku | 0 | 0 | 0 | 4 | 4 |
| IgnalinosR | 0 | 0 | 5 | 0 | 5 |
| JonavosR | 0 | 0 | 1 | 0 | 1 |
| JoniskioR | 0 | 0 | 1 | 0 | 1 |
| JurbarkoR | 0 | 0 | 2 | 0 | 2 |
| Kalvarijos | 0 | 0 | 0 | 4 | 4 |
| KaunoM | 0 | 0 | 0 | 4 | 4 |
| Kazlurudos | 0 | 0 | 0 | 4 | 4 |
| KedainiuR | 0 | 0 | 17 | 0 | 17 |
| KelmesR | 0 | 0 | 13417 | 0 | 13417 |
| KlaipedosR | 0 | 0 | 13417 | 0 | 13417 |
| KupiskioR | 0 | 0 | 0 | 4 | 4 |
| LTSAR | 0 | 0 | 10317 | 0 | 10317 |
| LazdijuR | 0 | 0 | 0 | 4 | 4 |
| PakruojoR | 0 | 0 | 0 | 4 | 4 |
| PanevezioM | 0 | 0 | 0 | 4 | 4 |
| PanevezioR | 0 | 0 | 0 | 4 | 4 |
| PasvalioR | 0 | 0 | 13416 | 0 | 13416 |
| PrienuR | 0 | 0 | 0 | 4 | 4 |
| RadviliskioR | 0 | 0 | 1 | 0 | 1 |
| RaseiniuR | 0 | 0 | 1 | 0 | 1 |
| RokiskioR | 0 | 0 | 1 | 0 | 1 |
| SilutesR | 0 | 0 | 7 | 0 | 7 |
| SkuodoR | 0 | 0 | 1 | 0 | 1 |
| TrakuR | 0 | 0 | 1 | 0 | 1 |
| UkmergesR | 0 | 0 | 1 | 0 | 1 |
| UtenosR | 0 | 0 | 1 | 0 | 1 |
| VarenosR | 0 | 0 | 1 | 0 | 1 |
| ZarasuR | 0 | 0 | 1 | 0 | 1 |
| birstono-gtfs | 0 | 0 | 0 | 5 | 5 |
| google_transit | 0 | 0 | 6 | 0 | 6 |
| gtfs_all | 8 | 2368 | 10327 | 0 | 12703 |
| All | 8 | 2368 | 60943 | 54 | 63373 |
GTFS patikrinimo įspėjimai¶
show_notices_table_by_severity('WARNING')
| klaida | duplicate_route_name | equal_shape_distance_same_coordinates | fast_travel_between_consecutive_stops | fast_travel_between_far_stops | leading_or_trailing_whitespaces | missing_timepoint_column | missing_timepoint_value | same_route_and_agency_url | stop_too_far_from_shape | stop_too_far_from_shape_using_user_distance | stops_match_shape_out_of_order | unexpected_enum_value | All |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| failas | |||||||||||||
| AlytausM | 0 | 0 | 0 | 0 | 0 | 0 | 5744 | 0 | 0 | 0 | 0 | 0 | 5744 |
| AlytausR | 0 | 3 | 0 | 0 | 0 | 0 | 2885 | 0 | 0 | 0 | 0 | 0 | 2888 |
| AnyksciuR | 0 | 0 | 0 | 0 | 0 | 0 | 2627 | 0 | 2 | 0 | 0 | 0 | 2629 |
| Birstono | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| BirzuR | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| Druskininku | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| Elektrenu | 0 | 0 | 0 | 0 | 0 | 0 | 434 | 0 | 0 | 0 | 0 | 0 | 434 |
| IgnalinosR | 0 | 0 | 0 | 0 | 0 | 0 | 1608 | 0 | 5 | 0 | 0 | 0 | 1613 |
| JonavosR | 0 | 0 | 0 | 0 | 1 | 0 | 6866 | 0 | 3 | 0 | 0 | 0 | 6870 |
| JoniskioR | 0 | 4 | 0 | 0 | 0 | 0 | 880 | 0 | 0 | 0 | 0 | 0 | 884 |
| JurbarkoR | 0 | 0 | 0 | 0 | 2 | 0 | 941 | 0 | 1 | 0 | 0 | 0 | 944 |
| Kalvarijos | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| KaunoM | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| KaunoR | 0 | 0 | 0 | 0 | 0 | 0 | 13005 | 0 | 0 | 0 | 0 | 0 | 13005 |
| Kazlurudos | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| KedainiuR | 0 | 1 | 0 | 0 | 0 | 0 | 4685 | 0 | 2 | 0 | 1 | 0 | 4689 |
| KelmesR | 0 | 5536 | 8 | 3 | 3 | 0 | 19387 | 0 | 9 | 0 | 0 | 0 | 24946 |
| KiasiadoriuR | 0 | 0 | 0 | 0 | 0 | 0 | 46 | 0 | 0 | 0 | 0 | 0 | 46 |
| KlaipedosM | 0 | 0 | 0 | 0 | 0 | 0 | 217 | 0 | 0 | 0 | 0 | 0 | 217 |
| KlaipedosR | 0 | 5536 | 8 | 3 | 3 | 0 | 19387 | 0 | 9 | 0 | 0 | 0 | 24946 |
| KretingosR | 0 | 1 | 0 | 0 | 0 | 0 | 2748 | 0 | 0 | 0 | 0 | 0 | 2749 |
| KupiskioR | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| LTSAR | 0 | 4270 | 8 | 3 | 4 | 0 | 16101 | 0 | 7 | 0 | 0 | 0 | 20393 |
| LazdijuR | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| Marijampoles | 0 | 2 | 0 | 0 | 0 | 0 | 3996 | 0 | 0 | 0 | 0 | 0 | 3998 |
| MazeikiuR | 0 | 2 | 0 | 0 | 0 | 0 | 1909 | 0 | 3 | 0 | 0 | 0 | 1914 |
| MoletuR | 0 | 15 | 0 | 0 | 1 | 0 | 919 | 0 | 0 | 0 | 0 | 0 | 935 |
| PagegiuR | 0 | 0 | 0 | 0 | 0 | 0 | 94 | 0 | 0 | 0 | 0 | 0 | 94 |
| PakruojoR | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| PalangosM | 0 | 0 | 0 | 0 | 0 | 0 | 160 | 0 | 0 | 0 | 0 | 0 | 160 |
| PanevezioM | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| PanevezioR | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| PasvalioR | 0 | 5543 | 8 | 3 | 3 | 0 | 19531 | 0 | 7 | 0 | 0 | 0 | 25095 |
| PlungesR | 0 | 0 | 0 | 0 | 0 | 0 | 2687 | 0 | 1 | 0 | 0 | 0 | 2688 |
| PrienuR | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| RadviliskioR | 0 | 13 | 0 | 0 | 0 | 0 | 4077 | 0 | 0 | 0 | 0 | 0 | 4090 |
| RaseiniuR | 0 | 0 | 0 | 0 | 0 | 0 | 787 | 0 | 0 | 0 | 0 | 0 | 787 |
| Rietavo | 0 | 0 | 0 | 0 | 0 | 0 | 24 | 0 | 0 | 0 | 0 | 0 | 24 |
| RokiskioR | 0 | 3 | 0 | 0 | 0 | 0 | 2685 | 0 | 0 | 0 | 0 | 0 | 2688 |
| SalcininkuR | 0 | 0 | 0 | 0 | 0 | 0 | 1027 | 0 | 0 | 0 | 0 | 0 | 1027 |
| SiauliuM | 0 | 0 | 0 | 0 | 0 | 0 | 256 | 0 | 0 | 0 | 0 | 0 | 256 |
| SiauliuR | 0 | 10 | 0 | 0 | 0 | 0 | 2257 | 0 | 0 | 0 | 0 | 0 | 2267 |
| SilalesR | 0 | 2 | 0 | 0 | 0 | 0 | 367 | 0 | 0 | 0 | 0 | 0 | 369 |
| SilutesR | 0 | 0 | 0 | 0 | 0 | 0 | 1240 | 0 | 0 | 0 | 0 | 0 | 1240 |
| SkuodoR | 0 | 1 | 0 | 0 | 1 | 0 | 978 | 0 | 0 | 0 | 0 | 0 | 980 |
| SvencioniuR | 0 | 2 | 0 | 0 | 0 | 0 | 1962 | 0 | 0 | 0 | 0 | 0 | 1964 |
| TauragesR | 0 | 0 | 0 | 0 | 0 | 0 | 2082 | 0 | 1 | 0 | 0 | 0 | 2083 |
| Taurages_R | 0 | 0 | 0 | 0 | 0 | 0 | 2082 | 0 | 1 | 0 | 0 | 0 | 2083 |
| TrakuR | 0 | 23 | 0 | 0 | 1 | 0 | 2436 | 0 | 2 | 0 | 0 | 0 | 2462 |
| UkmergesR | 0 | 2 | 0 | 0 | 5 | 0 | 4968 | 0 | 0 | 0 | 0 | 0 | 4975 |
| UtenosR | 0 | 7 | 0 | 0 | 0 | 0 | 3900 | 0 | 0 | 0 | 0 | 0 | 3907 |
| VarenosR | 0 | 48 | 0 | 0 | 0 | 0 | 1967 | 0 | 0 | 0 | 0 | 0 | 2015 |
| VilkaviskioR | 0 | 0 | 1 | 1 | 0 | 0 | 1400 | 0 | 3 | 0 | 1 | 0 | 1406 |
| VilniausM | 0 | 0 | 0 | 0 | 0 | 0 | 477375 | 0 | 0 | 0 | 0 | 17 | 477392 |
| VilniausR | 0 | 0 | 0 | 0 | 0 | 0 | 870 | 0 | 0 | 0 | 0 | 0 | 870 |
| VisaginoM | 0 | 0 | 0 | 0 | 0 | 0 | 116 | 0 | 0 | 0 | 0 | 0 | 116 |
| ZarasuR | 0 | 7 | 0 | 0 | 0 | 0 | 1332 | 0 | 2 | 0 | 0 | 0 | 1341 |
| akmene | 0 | 46 | 0 | 0 | 0 | 0 | 1322 | 0 | 0 | 0 | 0 | 0 | 1368 |
| birstono-gtfs | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
| google_transit | 0 | 7 | 0 | 0 | 0 | 0 | 2940 | 0 | 7 | 0 | 0 | 0 | 2954 |
| gtfs_all | 1328 | 4325 | 340 | 97 | 0 | 0 | 61318 | 1923 | 56 | 1657 | 0 | 1 | 71045 |
| neringa | 0 | 0 | 0 | 0 | 0 | 0 | 329 | 0 | 0 | 0 | 0 | 0 | 329 |
| All | 1328 | 25409 | 373 | 110 | 24 | 13 | 706954 | 1923 | 121 | 1657 | 2 | 18 | 737932 |
Stotelės¶
from zipfile import ZipFile
import gtfs_functions as gtfs
import plotly.express as px
all_stops = pd.DataFrame()
for file in sorted(os.listdir(gtfs_files_directory)):
if file.endswith('.zip') and file != 'gtfs_all.zip':
filename, _, _ = file.partition('.zip')
with ZipFile(os.path.join(gtfs_files_directory, file)) as gtfs_zip:
if "stops.txt" not in gtfs_zip.namelist():
continue
stops_csv = gtfs_zip.open("stops.txt")
stops_df = pd.read_csv(stops_csv)
stops_df['failas'] = filename
all_stops = pd.concat([all_stops, stops_df])
mapbox_access_token = open("../.mapbox_token").read()
px.set_mapbox_access_token(mapbox_access_token)
fig = px.scatter_mapbox(
data_frame=all_stops,
lat='stop_lat',
lon='stop_lon',
mapbox_style="light",
zoom=6,
title='Stotelės',
hover_name='stop_name',
color='failas',
)
fig.update_layout(
mapbox_layers=[
{
"sourceattribution": '© <a href="https://judumas.vycius.lt" target="_blank">Karolis Vyčius</a> © <a href="https://www.visimarsrutai.lt/gtfs/" target="_blank">Visimarsrutai.lt</a>'
}
])
fig.update_layout(margin={"r":0,"l":0,"b":0})
fig.show()
with ZipFile(os.path.join(gtfs_files_directory, 'google_transit.zip')) as gtfs_zip:
stops_csv = gtfs_zip.open("stops.txt")
google_transit_vintra_stops_df = pd.read_csv(stops_csv)
fig = px.scatter_mapbox(
data_frame=google_transit_vintra_stops_df,
lat='stop_lat',
lon='stop_lon',
mapbox_style="light",
zoom=6,
title='Google Maps stotelės iš Vintra',
hover_name='stop_name',
)
fig.update_layout(
mapbox_layers=[
{
"sourceattribution": '© <a href="https://judumas.vycius.lt" target="_blank">Karolis Vyčius</a> © <a href="https://www.visimarsrutai.lt/gtfs/" target="_blank">Visimarsrutai.lt</a>'
}
])
fig.update_layout(margin={"r":0,"l":0,"b":0})
fig.show()